Previous script: “06_get_eigengene_QTL.Rmd”
The goal is to find QTL peaks for the WGCNA eigen genes and see if those overalp with any growth QTL. We are only focusing on eigen genes that correlated with some growth traits/paramters.
library(qtl)
library(tidyverse)
[30m── [1mAttaching packages[22m ────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[30m[32m✔[30m [34mggplot2[30m 2.2.1 [32m✔[30m [34mpurrr [30m 0.2.4
[32m✔[30m [34mtibble [30m 1.3.4 [32m✔[30m [34mdplyr [30m 0.7.4
[32m✔[30m [34mtidyr [30m 0.7.2 [32m✔[30m [34mstringr[30m 1.3.0
[32m✔[30m [34mreadr [30m 1.1.1 [32m✔[30m [34mforcats[30m 0.2.0[39m
package ‘stringr’ was built under R version 3.4.3[30m── [1mConflicts[22m ───────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(stringr)
load("../output/scanone-eigengene-qtl_2012.RData")
threshold.95 <- tibble(perm.threshold=lod.thrs[5,],
trait=colnames(lod.thrs))
scanone.gather <- scanone_eigen %>%
gather(key = trait, value = LOD, -chr, -pos) %>%
mutate(condition=str_sub(trait,1,2), color=str_sub(trait,6,100)) %>%
left_join(threshold.95)
Joining, by = "trait"
scanone.gather
pl.UN <- scanone.gather %>% filter(condition=="UN") %>%
ggplot(aes(x=pos,y=LOD)) +
geom_line() +
geom_hline(aes(yintercept=perm.threshold),lty=2,lwd=.5,alpha=.5) +
facet_grid(trait ~ chr, scales="free") +
theme(strip.text.y = element_text(angle=0), axis.text.x = element_text(angle=90)) +
ggtitle("UN Eigen Gene QTL")
pl.UN
ggsave("../output/eigen gene eQTL UN 2012.pdf",width=12,height=8)
For each eigen gene, find QTL borders and look for overlap with growth QTL
For each eigen gene first identify chromosomes with “significant” peaks (in this case > 99% permuation threshold) and then runs bayesint() on them to define the intervals
sig.chrs <- scanone.gather %>% filter(LOD > perm.threshold) %>%
group_by(trait,chr) %>%
summarize(unique(chr))
sig.chrs
now for each significant chromosome/trait combo run bayesint
bayesint.list <- apply(sig.chrs,1,function(hit) {
result <- bayesint(scanone_eigen[c("chr","pos",hit["trait"])],
chr=hit["chr"],
lodcolumn = 1,
expandtomarkers = TRUE
)
colnames(result)[3] <- "LOD"
result
})
names(bayesint.list) <- sig.chrs$trait
bayesint.list <- lapply(bayesint.list,function(x) x %>%
as.data.frame() %>%
rownames_to_column(var="markername") %>%
mutate(chr=as.character(chr))
)
bayesint.result <- as.tibble(bind_rows(bayesint.list,.id="trait")) %>%
select(trait,chr,pos,markername,LOD) %>%
separate(markername,into=c("chr1","Mbp"),sep="x", convert=TRUE) %>%
group_by(trait,chr) %>%
summarize(start=min(Mbp),end=max(Mbp),min_eQTL_LOD=min(LOD),max_eQTL_LOD=max(LOD)) %>%
#for the high QTL peaks the interval width is 0. That is overly precise and need to widen those.
mutate(start=ifelse(start==end,max(0,start-20000),start), end=ifelse(start==end,end+20000,end))
Too few values at 3 locations: 20, 35, 38
bayesint.result
Load annotation
BrapaAnnotation <- read_csv("../input/Brapa_V1.5_annotated.csv")
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
X1 = col_integer(),
name = col_character(),
chrom = col_character(),
start = col_integer(),
end = col_integer(),
subject = col_character(),
AGI = col_character(),
At_symbol = col_character(),
At_description = col_character(),
perc_ID = col_double(),
aln_length = col_integer(),
mismatch = col_integer(),
gap_open = col_integer(),
qstart = col_integer(),
qend = col_integer(),
sstart = col_integer(),
send = col_integer(),
eval = col_double(),
score = col_double()
)
BrapaAnnotation
eigen.annotated <- lapply(1:nrow(bayesint.result),function(row) {
qtl <- bayesint.result[row,]
results <- subset(BrapaAnnotation, chrom==qtl$chr &
start >= qtl$start &
end <= qtl$end)
}
)
names(eigen.annotated) <- bayesint.result$trait
eigen.annotated <- bind_rows(eigen.annotated,.id="trait") %>%
mutate(chrom=as.character(chrom)) %>%
left_join(bayesint.result,by=c("trait","chrom"="chr")) %>% #get eQTL LOD
rename(eigen_eQTL_candidate=name)
eigen.annotated.small <- eigen.annotated %>% select(trait,eigen_eQTL_candidate,ends_with("LOD"))
eigen.annotated.small
given bayesint results, find overlaps with UN growth QTL
QTLgenes <- read_csv("../input/Bayesian2012Height_genesunderQTLS.csv")[,-1] #genes under height QTL peaks
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
.default = col_integer(),
.id = col_character(),
FVTtrait = col_character(),
name = col_character(),
chrom = col_character(),
subject = col_character(),
AGI = col_character(),
At_symbol = col_character(),
At_description = col_character(),
perc_ID = col_double(),
eval = col_double(),
score = col_double()
)
See spec(...) for full column specifications.
QTLgenes
eigen.qtl.combined <- inner_join(eigen.annotated.small,QTLgenes,by=c("eigen_eQTL_candidate"="name")) %>%
select(.id, trait, everything())
eigen.qtl.combined
how many QTL have at least some overlap?
sort(unique(QTLgenes$.id))
[1] "QTL1" "QTL2" "QTL3" "QTL4" "QTL5" "QTL6" "QTL7" "QTL8" "QTL9"
sort(unique(eigen.qtl.combined$.id))
[1] "QTL1" "QTL2" "QTL4" "QTL6" "QTL7" "QTL8" "QTL9"
All
are all eigen genes overlapping?
unique(eigen.annotated.small$trait)
[1] "UN_MEblue" "UN_MEbrown" "UN_MEcyan"
[4] "UN_MEdarkslateblue" "UN_MEmidnightblue" "UN_MEpurple"
[7] "UN_MEturquoise" "UN_MEyellow" "UN_MEyellowgreen"
unique(eigen.qtl.combined$trait)
[1] "UN_MEblue" "UN_MEbrown" "UN_MEcyan"
[4] "UN_MEdarkslateblue" "UN_MEmidnightblue" "UN_MEpurple"
[7] "UN_MEturquoise" "UN_MEyellow"
No, 8 of 10
write_csv(eigen.qtl.combined,"../output/Bayesian2012Height_genesunderQTLS_eigenQTL_overlap.csv")
threshold.95 <- tibble(perm.threshold=lod.thrs.cim[5,],
trait=colnames(lod.thrs.cim))
scanone.gather <- scanone_eigen_cim %>%
gather(key = trait, value = LOD, -chr, -pos) %>%
mutate(condition=str_sub(trait,1,2), color=str_sub(trait,6,100)) %>%
left_join(threshold.95)
Joining, by = "trait"
scanone.gather
pl.UN <- scanone.gather %>% filter(condition=="UN") %>%
ggplot(aes(x=pos,y=LOD)) +
geom_line() +
geom_hline(aes(yintercept=perm.threshold),lty=2,lwd=.5,alpha=.5) +
facet_grid(trait ~ chr, scales="free") +
theme(strip.text.y = element_text(angle=0), axis.text.x = element_text(angle=90)) +
ggtitle("UN Eigen Gene QTL")
pl.UN
ggsave("../output/eigen gene eQTL UN CIM 2012.pdf",width=12,height=8)
For each eigen gene, find QTL borders and look for overlap with growth QTL
For each eigen gene first identify chromosomes with “significant” peaks (in this case > 99% permuation threshold) and then runs bayesint() on them to define the intervals
sig.chrs <- scanone.gather %>% filter(LOD > perm.threshold) %>%
group_by(trait,chr) %>%
summarize(unique(chr))
sig.chrs
now for each significant chromosome/trait combo run bayesint
bayesint.list <- apply(sig.chrs,1,function(hit) {
result <- bayesint(scanone_eigen[c("chr","pos",hit["trait"])],
chr=hit["chr"],
lodcolumn = 1,
expandtomarkers = TRUE
)
colnames(result)[3] <- "LOD"
result
})
names(bayesint.list) <- sig.chrs$trait
bayesint.list <- lapply(bayesint.list,function(x) x %>%
as.data.frame() %>%
rownames_to_column(var="markername") %>%
mutate(chr=as.character(chr))
)
bayesint.result <- as.tibble(bind_rows(bayesint.list,.id="trait")) %>%
select(trait,chr,pos,markername,LOD) %>%
separate(markername,into=c("chr1","Mbp"),sep="x", convert=TRUE) %>%
group_by(trait,chr) %>%
summarize(start=min(Mbp),end=max(Mbp),min_eQTL_LOD=min(LOD),max_eQTL_LOD=max(LOD)) %>%
#for the high QTL peaks the interval width is 0. That is overly precise and need to widen those.
mutate(start=ifelse(start==end,max(0,start-20000),start), end=ifelse(start==end,end+20000,end))
Too few values at 2 locations: 17, 26
bayesint.result
Load annotation
BrapaAnnotation <- read_csv("../input/Brapa_V1.5_annotated.csv")
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
X1 = col_integer(),
name = col_character(),
chrom = col_character(),
start = col_integer(),
end = col_integer(),
subject = col_character(),
AGI = col_character(),
At_symbol = col_character(),
At_description = col_character(),
perc_ID = col_double(),
aln_length = col_integer(),
mismatch = col_integer(),
gap_open = col_integer(),
qstart = col_integer(),
qend = col_integer(),
sstart = col_integer(),
send = col_integer(),
eval = col_double(),
score = col_double()
)
BrapaAnnotation
eigen.annotated <- lapply(1:nrow(bayesint.result),function(row) {
qtl <- bayesint.result[row,]
results <- subset(BrapaAnnotation, chrom==qtl$chr &
start >= qtl$start &
end <= qtl$end)
}
)
names(eigen.annotated) <- bayesint.result$trait
eigen.annotated <- bind_rows(eigen.annotated,.id="trait") %>%
mutate(chrom=as.character(chrom)) %>%
left_join(bayesint.result,by=c("trait","chrom"="chr")) %>% #get eQTL LOD
rename(eigen_eQTL_candidate=name)
eigen.annotated.small <- eigen.annotated %>% select(trait,eigen_eQTL_candidate,ends_with("LOD"))
eigen.annotated.small
given bayesint results, find overlaps with UN growth QTL
QTLgenes <- read_csv("../input/Bayesian2012Height_genesunderQTLS.csv")[,-1] #genes under height QTL peaks
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
.default = col_integer(),
.id = col_character(),
FVTtrait = col_character(),
name = col_character(),
chrom = col_character(),
subject = col_character(),
AGI = col_character(),
At_symbol = col_character(),
At_description = col_character(),
perc_ID = col_double(),
eval = col_double(),
score = col_double()
)
See spec(...) for full column specifications.
QTLgenes
eigen.qtl.combined <- inner_join(eigen.annotated.small,QTLgenes,by=c("eigen_eQTL_candidate"="name")) %>%
select(.id, trait, everything())
eigen.qtl.combined
how many QTL have at least some overlap?
unique(QTLgenes$.id)
[1] "QTL1" "QTL2" "QTL3" "QTL4" "QTL5" "QTL6" "QTL7" "QTL8" "QTL9"
unique(eigen.qtl.combined$.id)
[1] "QTL9" "QTL2" "QTL4" "QTL6" "QTL7" "QTL8" "QTL1"
seven of nine
are all eigen genes overlapping?
unique(eigen.annotated.small$trait)
[1] "UN_MEblue" "UN_MEbrown" "UN_MEcyan"
[4] "UN_MEdarkslateblue" "UN_MEmidnightblue" "UN_MEturquoise"
[7] "UN_MEyellowgreen"
unique(eigen.qtl.combined$trait)
[1] "UN_MEbrown" "UN_MEcyan" "UN_MEdarkslateblue"
[4] "UN_MEmidnightblue" "UN_MEturquoise"
No, 5
write_csv(eigen.qtl.combined,"../output/Bayesian2012Height_genesunderQTLS_eigenQTL_overlap_CIM.csv")